import scrapy
from bs4 import BeautifulSoup
from ..items import DangdangItem


class BookSpider(scrapy.Spider):
    name = 'book'
    allowed_domains = ['dangdang.com']
    # http://search.dangdang.com/?key=python&act=input&page_index=100
    start_urls = ['http://search.dangdang.com/?key=python&act=input&page_index=1']

    def parse(self, response):
        soup = BeautifulSoup(response.text, 'lxml')
        bigimgs = soup.find('ul', class_='bigimg')

        books = bigimgs.find_all('li')

        for book in books:
            item = DangdangItem()
            title = book.find('a', class_='pic')
            target = book.find('p', class_='name')
            price = book.find('span', class_='search_now_price')
            comment_num = book.find('p', class_='search_star_line')
            information = book.find('p', class_="search_book_author")
            span = information.find_all('span')

            item['title'] = title['title']
            item['link'] = target.find('a')['href']
            item['target'] = target.text
            item['price'] = price.text
            item['comment_num'] = comment_num.text
            item['author'] = span[0].text
            item['press'] = span[-1].text
            item['time'] = span[1].text
            yield item

        next = response.xpath("//a[normalize-space(translate(text(),' ', ' '))='下一页']/@href").extract_first()
        next_url = response.urljoin(next)
        print('下一页:{}'.format(next_url))
        yield scrapy.Request(url=next_url, callback=self.parse, dont_filter=True)